{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Standard workflow for optimizing Tensorflow model to TensorRT\n",
    "\n",
    "![alt text](pictures/tf-trt_workflow.png)\n",
    "\n",
    "How to build this frozen model can be seen [here](https://github.com/ardianumam/tensorflow-yolov3), or you can just use this frozen model I already provide in the github page in the video description below.\n",
    "\n",
    "## Import needed modules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import the needed libraries\n",
    "import cv2\n",
    "import time\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import tensorflow.contrib.tensorrt as trt\n",
    "from tensorflow.python.platform import gfile\n",
    "from PIL import Image\n",
    "from YOLOv3 import utils"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Optimize YOLOv3 frozen model to TensorRT model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# function to read a \".pb\" model \n",
    "# (can be used to read frozen model or TensorRT model)\n",
    "def read_pb_graph(model):\n",
    "  with gfile.FastGFile(model,'rb') as f:\n",
    "    graph_def = tf.GraphDef()\n",
    "    graph_def.ParseFromString(f.read())\n",
    "  return graph_def\n",
    "frozen_graph = read_pb_graph(\"./model/YOLOv3/yolov3_gpu_nms.pb\")\n",
    "\n",
    "your_outputs = [\"Placeholder:0\", \"concat_9:0\", \"mul_9:0\"]\n",
    "# convert (optimize) frozen model to TensorRT model\n",
    "trt_graph = trt.create_inference_graph(\n",
    "    input_graph_def=frozen_graph,# frozen model\n",
    "    outputs=your_outputs,\n",
    "    max_batch_size=1,# specify your max batch size\n",
    "    max_workspace_size_bytes=2*(10**9),# specify the max workspace\n",
    "    precision_mode=\"FP16\") # precision, can be \"FP32\" (32 floating point precision) or \"FP16\"\n",
    "\n",
    "#write the TensorRT model to be used later for inference\n",
    "with gfile.FastGFile(\"./model/YOLOv3/TensorRT_YOLOv3_2.pb\", 'wb') as f:\n",
    "    f.write(trt_graph.SerializeToString())\n",
    "print(\"TensorRT model is successfully stored!\")\n",
    "\n",
    "# check how many ops of the original frozen model\n",
    "all_nodes = len([1 for n in frozen_graph.node])\n",
    "print(\"numb. of all_nodes in frozen graph:\", all_nodes)\n",
    "\n",
    "# check how many ops that is converted to TensorRT engine\n",
    "trt_engine_nodes = len([1 for n in trt_graph.node if str(n.op) == 'TRTEngineOp'])\n",
    "print(\"numb. of trt_engine_nodes in TensorRT graph:\", trt_engine_nodes)\n",
    "all_nodes = len([1 for n in trt_graph.node])\n",
    "print(\"numb. of all_nodes in TensorRT graph:\", all_nodes)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cofiguration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# config\n",
    "SIZE = [416, 416] #input image dimension\n",
    "# video_path = 0 # if you use camera as input\n",
    "video_path = \"./dataset/demo_video/road2.mp4\" # path for video input\n",
    "classes = utils.read_coco_names('./YOLOv3/coco.names')\n",
    "num_classes = len(classes)\n",
    "GIVEN_ORIGINAL_YOLOv3_MODEL = \"./model/YOLOv3/yolov3_gpu_nms.pb\" # to use given original YOLOv3\n",
    "TENSORRT_YOLOv3_MODEL = \"./model/YOLOv3/TensorRT_YOLOv3_2.pb\" # to use the TensorRT optimized model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Perform inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/cvrc/TensorRT/YOLOv3/utils.py:225: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.gfile.GFile.\n"
     ]
    }
   ],
   "source": [
    "# get input-output tensor\n",
    "input_tensor, output_tensors = \\\n",
    "utils.read_pb_return_tensors(tf.get_default_graph(),\n",
    "                             TENSORRT_YOLOv3_MODEL,\n",
    "                             [\"Placeholder:0\", \"concat_9:0\", \"mul_9:0\"])\n",
    "\n",
    "# perform inference\n",
    "with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5))) as sess:\n",
    "    vid = cv2.VideoCapture(video_path) # must use opencv >= 3.3.1 (install it by 'pip install opencv-python')\n",
    "    while True:\n",
    "        return_value, frame = vid.read()\n",
    "        if return_value == False:\n",
    "            print('ret:', return_value)\n",
    "            vid = cv2.VideoCapture(video_path)\n",
    "            return_value, frame = vid.read()\n",
    "        if return_value:\n",
    "            image = Image.fromarray(frame)\n",
    "        else:\n",
    "            raise ValueError(\"No image!\")\n",
    "            \n",
    "        img_resized = np.array(image.resize(size=tuple(SIZE)), \n",
    "                               dtype=np.float32)\n",
    "        img_resized = img_resized / 255.\n",
    "        prev_time = time.time()\n",
    "\n",
    "        boxes, scores = sess.run(output_tensors, \n",
    "                                 feed_dict={input_tensor: \n",
    "                                            np.expand_dims(\n",
    "                                                img_resized, axis=0)})\n",
    "        boxes, scores, labels = utils.cpu_nms(boxes, \n",
    "                                              scores, \n",
    "                                              num_classes, \n",
    "                                              score_thresh=0.4, \n",
    "                                              iou_thresh=0.5)\n",
    "        image = utils.draw_boxes(image, boxes, scores, labels, \n",
    "                                 classes, SIZE, show=False)\n",
    "\n",
    "        curr_time = time.time()\n",
    "        exec_time = curr_time - prev_time\n",
    "        result = np.asarray(image)\n",
    "        info = \"time:\" + str(round(1000*exec_time, 2)) + \" ms, FPS: \" + str(round((1000/(1000*exec_time)),1))\n",
    "        cv2.putText(result, text=info, org=(50, 70), \n",
    "                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,\n",
    "                    fontScale=1, color=(255, 0, 0), thickness=2)\n",
    "        #cv2.namedWindow(\"result\", cv2.WINDOW_AUTOSIZE)\n",
    "        cv2.imshow(\"result\", result)\n",
    "        if cv2.waitKey(10) & 0xFF == ord('q'): break\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Notes:\n",
    "TensorRT optimizes a deep learning model specifically in the machine you use when performing TensorRT optimization. In other words, you cannot use the stored TensorRT model in the different GPU hardware, and you need to optimize the model directy there in that GPU machine."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:TF1120_GPU]",
   "language": "python",
   "name": "conda-env-TF1120_GPU-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
